install.packages('corpcor')
install.packages('mctest')
install.packages('pROC')
install.packages('caret')
require(pROC)
require(mgcv)
require(corpcor)
require(mctest)
require(rpart)
require(randomForest)
require(MultivariateRandomForest)
require(boot)
require(plyr)
require(hydroGOF)
require(MASS)
require(caret)

#Importing datasets
all_sellerdata  = read.csv(file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\bbox_allsellers_breville.csv", header=TRUE, sep=",")
focal3Pdata  = read.csv(file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\bbox_focal3pseller_breville.csv", header=TRUE, sep=",")
amzndata  = read.csv(file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\bbox_amzn_breville.csv", header=TRUE, sep=",")



attach(all_sellerdata)


trainY = buybox_winner_likeli
trainX = cbind(pricediff_tolowest, priceratio_tolowest,priceratio_toprevious,dayofmonth, seller_rate, num_seller_Posrating, num_seller_ratings,
               FBA,Freeship, Prime)


trainX[is.na(trainX)] = 0


#VARIABLE TREATMENT

#VARIANCE CHECK
variance_X = rep(0,ncol(trainX))

for (m in (1:ncol(trainX)))
{ variance_X[m] = var(trainX[,m]) }

nonzero_var = ncol(trainX)- sum((variance_X == 0)[]*1) #number of variables with non-zero variance

#training samples
trainX1 = trainX[1:1776,]
trainY1 = trainY[1:1776]
testX1 = trainX[1777:nrow(trainX),]
testY1 = trainY[1777:nrow(trainX)]

#Bootstrapping INTO TRAIN SAMPLES

ntree = 500
N= nrow(trainX1)
ntest = nrow(testX1)
dimX1 = ncol(trainX1)
dimY1 = 1
samplsize = round((2/3)*nrow(trainX1))
results1 = matrix(0,samplsize,ntree) 
all_index = c(1:N)

VIM_matrix1 = matrix(0,dimX1,ntree)
avg_VIM1 = rep(0,dimX1) 

for (i in 1:ntree)
{
  ## A bootstrap sample of indices of observations for each tree
  results1[,i]= t(sample(1:N,size = samplsize, replace=TRUE))
}

trainX1_bootstrap = array(c(0,0), dim= c(samplsize,dimX1,ntree))
trainY1_bootstrap = matrix(0,samplsize,ntree)
predict_test_array = matrix(0,ntest,ntree)


for (i in 1:ntree)
{
  
  for (j in (1:samplsize))
  { 
    ##Creating bootstrap samples based on indices of observations used in each tree
    trainX1_bootstrap[j,,i] = trainX1[results1[j,i],]
    trainY1_bootstrap[j,i] = trainY1[results1[j,i]]
  }}


for (i in 1:ntree)
{
  
 # tree = build_single_tree(trainX1_bootstrap[,,i], trainY1_bootstrap[,i,drop=FALSE], m_feature=5, min_leaf=10, Inv_Cov_Y = matrix(1,1,1), Command=1)
  
  myfile1 <- file.path("D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\", paste0("tree",i,  ".RData"))
  
  tree = get(load(myfile1))
   predict_test_array[,i] = single_tree_prediction(tree, X_test= testX1, Variable_number=dimY1)
  
  
  #Variable Importance Measure of Each Tree, package computed variable importance
  VIM_matrix1[,i] = t(variable_importance_measure(tree,dimX1))
  
  #myfile1 <- file.path("D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\", paste0("tree",i,  ".RData"))
  
  
 # save(tree,file = myfile1)
  
  print(i)
  
} #ntree loop completes here


avg_predict = rep(0,ntest)

for (i in (1:ntest))
{
  avg_predict[i] = mean(predict_test_array[i,])
  
}

#Get the best threshold
r <- roc(testY1,avg_predict, AUC = TRUE, best.weights = c(1,0.04))


thres = coords(r, "best", ret="threshold", transpose = TRUE,best.method="closest.topleft", best.weights=c(1,0.04))
#0.351852  
thres = 0.351852

predictallsellers_bboxwin = rep(0,ntest)

for (i in (1:ntest))
{
  if (avg_predict[i] > thres) { predictallsellers_bboxwin[i] = 1 } else { predictallsellers_bboxwin[i] = 0}
}

MAE_mat = mean(abs(avg_predict- testY1))
MSE_mat = mean((avg_predict- testY1)^2)
SDSE_mat  = sd((avg_predict- testY1)^2)

avg_VIM = rep(0,dimX1)

for (l in (1:dimX1))
{
  avg_VIM[l] = mean(VIM_matrix1[l,])
}


#Creating confusion matrix
confusionmat <- confusionMatrix(data=as.factor(predictallsellers_bboxwin), reference = as.factor(testY1))

write.csv(c(MAE_mat,MSE_mat,SDSE_mat), file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\MAE_MSE_RFBuybox.csv")
write.csv(cbind(avg_predict,testY1), file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\predictionallsellers_RFBuybox.csv")
write.csv(cbind(predictallsellers_bboxwin,testY1), file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\predictedvaluesallsellers_RFBuybox.csv")
write.csv(avg_VIM, file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\avg_VIM_RFBuybox.csv")


#removing and detaching all seller data
rm(all_sellerdata)
detach(all_sellerdata)

#USING FOCAL 3P SELLER AS THE VALIDATION FOR THE BUY BOX WINNER PREDICTION RANDOM FOREST**/
attach(focal3Pdata)
testX_focalseller = cbind(pricediff_tolowest, priceratio_tolowest,priceratio_toprevious,dayofmonth, seller_rate, num_seller_Posrating, num_seller_ratings,
                          FBA,Freeship, Prime)

testY_focalseller = buybox_winner_likeli
ntest1 = nrow(testX_focalseller)
predict_test_array = matrix(0,ntest1,ntree)
dimY1 = 1

for (i in 1:ntree)
{
  
  #loading saved tree: note in my case get(load()) works and not load()
  
  myfile1 <- file.path("D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\", paste0("tree",i,  ".RData"))
  
  tree = get(load(myfile1))
  
  predict_test_array[,i] = single_tree_prediction(tree, X_test= testX_focalseller, Variable_number=dimY1)
  
} #ntree loop completes here


avg_predict_focal3p = rep(0,ntest1)
predict_bboxwin = rep(0,ntest1)

for (i in (1:ntest1))
{
  avg_predict_focal3p[i] = mean(predict_test_array[i,])
  if (avg_predict_focal3p[i] > thres) { predict_bboxwin[i] = 1 } else { predict_bboxwin[i] = 0}
}

MAE_focal3P = mean(abs(avg_predict_focal3p - testY_focalseller))
MSE_focal3P = mean((avg_predict_focal3p - testY_focalseller)^2)
SDSE_focal3P  = sd((avg_predict_focal3p - testY_focalseller)^2)

#Creating confusion matrix
confusionmat <- confusionMatrix(data=as.factor(predict_bboxwin), reference = as.factor(testY_focalseller))

write.csv(c(avg_predict_focal3p,testY_focalseller), file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\predictionfocal3PRFBuybox.csv")

write.csv(c(MAE_focal3P,MSE_focal3P,SDSE_focal3P), file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\MAE_MSE_focal3PRFBuybox.csv")

write.csv(predict_bboxwin, file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\predictedvaluesfocal3PRFBuybox.csv")

#removing and detaching focal seller data
rm(focal3Pdata)
detach(focal3Pdata)

#validating on Amazon data
attach(amzndata)

testX_amzn = cbind(pricediff_tolowest, priceratio_tolowest,priceratio_toprevious,dayofmonth, seller_rate, num_seller_Posrating, num_seller_ratings,
                   FBA,Freeship, Prime)

testY_amzn = buybox_winner_likeli
ntest1 = nrow(testX_amzn)
predict_test_array = matrix(0,ntest1,ntree)
dimY1 = 1

for (i in 1:ntree)
{
  
  #loading saved tree: note in my case get(load()) works and not load()
  
  myfile1 <- file.path("D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\", paste0("tree",i,  ".RData"))
  
  tree = get(load(myfile1))
  
  predict_test_array[,i] = single_tree_prediction(tree, X_test= testX_amzn, Variable_number=dimY1)
  
} #ntree loop completes here


avg_predict_amzn = rep(0,ntest1)
predict_bboxwin = rep(0,ntest1)

for (i in (1:ntest1))
{
  avg_predict_amzn[i] = mean(predict_test_array[i,])
  if (avg_predict_amzn[i] > thres) { predict_bboxwin[i] = 1 } else { predict_bboxwin[i] = 0}
}

MAE_amzn = mean(abs(avg_predict_amzn - testY_amzn))
MSE_amzn = mean((avg_predict_amzn - testY_amzn)^2)
SDSE_amzn  = sd((avg_predict_amzn - testY_amzn)^2)

#Creating confusion matrix
confusionmat <- confusionMatrix(data=as.factor(predict_bboxwin), reference = as.factor(testY_amzn))

write.csv(c(avg_predict_amzn,testY_amzn), file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\predictionamznRFBuybox.csv")

write.csv(c(MAE_amzn,MSE_amzn,SDSE_amzn), file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\MAE_MSE_amznRFBuybox.csv")

write.csv(cbind(predict_bboxwin,testY_amzn), file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\predictedvaluesamznRFBuybox.csv")


roc.pred1 <- roc(testY1,avg_predict, percent = TRUE, main = "Smoothing")
roc.pred2 <- roc(testY_focalseller, avg_predict_focal3p, percent = TRUE, main = "Smoothing")
roc.pred3 <- roc(testY_amzn, avg_predict_amzn, percent = TRUE, main = "Smoothing")


plot.roc(roc.pred1, percent = TRUE, main = "ROC Curve for Buy Box win Prediction", col="blue", add = FALSE, asp =NA, print.auc= TRUE)  #92.8%
lines.roc(roc.pred2, type = "l", lty = 1, col = "red", lwd = 2, print.auc= TRUE)  #82.4%
lines.roc(roc.pred3, type = "l", lty = 1, col = "green", lwd = 2, print.auc= TRUE)  #95.7%


#reading focal 3P seller prescribed price to get probability of winning buy box
detach(amzndata)
focal3P_prescribedprice  = read.csv(file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\Managerial Implications\\bboxfocal3p_prescribcovar_breville.csv", header=TRUE, sep=",")
focal3P_prescribed= focal3P_prescribedprice[2:185,]

attach(focal3P_prescribed)

ntree= 500
testX_focalseller = cbind(pricediff_tolowest, priceratio_tolowest,priceratio_toprevious,dayofmonth, seller_rate, num_seller_Posrating, num_seller_ratings,
                          FBA,Freeship, Prime)

testY_focalseller = buybox_winner_likeli #win likelihood with old price and not using forecasting tool
ntest1 = nrow(focal3P_prescribed)
predict_test_array = matrix(0,ntest1,ntree)
dimY1 = 1

for (i in 1:ntree)
{
  
  #loading saved tree: note in my case get(load()) works and not load()
  
  myfile1 <- file.path("D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\", paste0("tree",i,  ".RData"))
  
  tree = get(load(myfile1))
  
  predict_test_array[,i] = single_tree_prediction(tree, X_test= testX_focalseller, Variable_number=dimY1)
  
} #ntree loop completes here


avg_predict_focal3p = rep(0,ntest1)
predict_bboxwin = rep(0,ntest1)

for (i in (1:ntest1))
{
  avg_predict_focal3p[i] = mean(predict_test_array[i,])
  if (avg_predict_focal3p[i] > thres) { predict_bboxwin[i] = 1 } else { predict_bboxwin[i] = 0}
}


write.csv(cbind(time1,buybox_winner_likeli,predict_bboxwin), file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\predictedwinvsactual_focal3p.csv")

#reading second 3P seller prescribed price to get probability of winning buy box

second3P_prescribedprice  = read.csv(file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\Managerial Implications\\bboxsecond3p_prescribcovar_breville.csv", header=TRUE, sep=",")
second3P_prescribed= second3P_prescribedprice[2:132,]

attach(second3P_prescribed)

ntree= 500
testX_second3p = cbind(pricediff_tolowest, priceratio_tolowest,priceratio_toprevious,dayofmonth, seller_rate, num_seller_Posrating, num_seller_ratings,
                          FBA,Freeship, Prime)

testY_second3p = buybox_winner_likeli #win likelihood with old price and not using forecasting tool
ntest1 = nrow(second3P_prescribed)
predict_test_array = matrix(0,ntest1,ntree)
dimY1 = 1

for (i in 1:ntree)
{
  
  #loading saved tree: note in my case get(load()) works and not load()
  
  myfile1 <- file.path("D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\", paste0("tree",i,  ".RData"))
  
  tree = get(load(myfile1))
  
  predict_test_array[,i] = single_tree_prediction(tree, X_test= testX_second3p, Variable_number=dimY1)
  
} #ntree loop completes here


avg_predict_second3p = rep(0,ntest1)
predict_bboxwin = rep(0,ntest1)

for (i in (1:ntest1))
{
  avg_predict_second3p[i] = mean(predict_test_array[i,])
  if (avg_predict_second3p[i] > thres) { predict_bboxwin[i] = 1 } else { predict_bboxwin[i] = 0}
}


write.csv(cbind(time1,buybox_winner_likeli,predict_bboxwin), file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\predictedwinvsactual_second3p.csv")



###second 3p with larger price cut prescribed $20
#reading second 3P seller prescribed price to get probability of winning buy box

second3P_prescribedprice  = read.csv(file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\Managerial Implications\\bboxsecond3p_prescribcovar20dollars_breville.csv", header=TRUE, sep=",")
second3P_prescribed= second3P_prescribedprice[2:132,]

attach(second3P_prescribed)

ntree= 500
testX_second3p = cbind(pricediff_tolowest, priceratio_tolowest,priceratio_toprevious,dayofmonth, seller_rate, num_seller_Posrating, num_seller_ratings,
                       FBA,Freeship, Prime)

testY_second3p = buybox_winner_likeli #win likelihood with old price and not using forecasting tool
ntest1 = nrow(second3P_prescribed)
predict_test_array = matrix(0,ntest1,ntree)
dimY1 = 1

for (i in 1:ntree)
{
  
  #loading saved tree: note in my case get(load()) works and not load()
  
  myfile1 <- file.path("D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\", paste0("tree",i,  ".RData"))
  
  tree = get(load(myfile1))
  
  predict_test_array[,i] = single_tree_prediction(tree, X_test= testX_second3p, Variable_number=dimY1)
  
} #ntree loop completes here


avg_predict_second3p = rep(0,ntest1)
predict_bboxwin = rep(0,ntest1)

for (i in (1:ntest1))
{
  avg_predict_second3p[i] = mean(predict_test_array[i,])
  if (avg_predict_second3p[i] > thres) { predict_bboxwin[i] = 1 } else { predict_bboxwin[i] = 0}
}


write.csv(cbind(time1,buybox_winner_likeli,predict_bboxwin), file="D:\\Amazon Price Dynamics\\New Data\\Managerial Implications_simulation\\RF_simulation_JMRnr\\predictedwinvsactual_second3p_20dollar.csv")


